# Setup
rm(list=ls())
setwd("/Users/ac/Documents/columbia/study/2015fall/G5999 Master Thesis/Thesis Writing/Data&R")
source("/Users/ac/Documents/columbia/study/2015fall/G5999 Master Thesis/Thesis Writing/Data&R/data functions.R")
LoadLibrary()

y3 <- read.csv("entire dataset final.csv", header = T)
y3[,1] <- as.character(y3[,1])
y3[,2] <- as.character(y3[,2])
y3$time_trend <- y3$year


# 0. Descriptive Statistics
## 0.1 Yield
thosestates <- c("CO", "IL", "IN", "IA", "KS", "KY", "MI", "MN", "MO", 
                 "NE", "NC", "ND", "OH", "PA", "SD", "TN", "TX", "WI")
#par(mfrow=c(6,3))
ggplot(y3, aes(x=year, y=grain_yi, color=state_abbr)) + 
  geom_line(size=0.5) +
  #xlim(1995,2014) + 
  scale_x_continuous(breaks=seq(1995,2014,1)) +
  theme(axis.text.x = element_text(angle = 30)) +
  theme(axis.line = element_line(colour = "black"),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.border = element_blank(),
        panel.background = element_blank() ) +
  ggtitle("Corn Yield of the 18 States in the U.S., 1995-2014") + 
  xlab("Year") + 
  ylab("Yield (bu/acre)")
ggsave("Yield Descriptice Stats ori.jpg")

sixstates <- c("IL", "IN", "MO", "NE", "IA", "OH") 
  # Those are states with highest yield: Illinois Indiana Missouri Nebraska Iowa Ohio
y3_six <- subset(y3, y3$state_abbr==sixstates)
ggplot(y3_six, aes(x=year, y=grain_yi, color=state_abbr)) + 
  geom_line(size=0.5) +
  # xlim(1995,2014) + 
  scale_x_continuous(breaks=seq(1995,2014,1)) +
  theme(axis.text.x = element_text(angle = 30)) +
  theme(axis.line = element_line(colour = "black"),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.border = element_blank(),
        panel.background = element_blank() ) +
  ggtitle("Corn Yield of the 18 States in the U.S., 1995-2014") + 
  xlab("Year") + 
  ylab("Yield (bu/acre)")
ggsave("Yield Descriptive Stats for Six States ori.jpg")

## 0.2 Production
ggplot(y3, aes(x=year, y=grain_prod, color=state_abbr)) + 
  geom_line(size=0.5) +
  #xlim(1995,2014) + 
  scale_x_continuous(breaks=seq(1995,2014,1)) +
  theme(axis.text.x = element_text(angle = 30)) +
  theme(axis.line = element_line(colour = "black"),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.border = element_blank(),
        panel.background = element_blank() ) +
  ggtitle("Corn Production of the 18 States in the U.S., 1995-2014") + 
  xlab("Year") + 
  ylab("Production (bushel)")
ggsave("Production Descriptive Stats ori.jpg")



##################################################################################################
##################################################################################################
# Choices of Models
# Selections: 
#      1. Dependent Variable:
#         1.1 grain_yi;
#         1.2 grain_prod
#      2. Climate Variables:
#         2.1 prcp_6 + tmean_6;
#         2.2 prcp_7 + tmean_7;
#         2.3 prcp_6 + tmean_6 + prcp_7 + tmean_7;
#         2.4 prcp_5 + tmean_5 + prcp_6 + tmean_6 + prcp_7 + tmean_7 + prcp_8 + tmean_8;
#      3. Time Term (prefer usnig time trend as in other literature)
#         time trend;
#         time as factor
# Rest of the covariates:
#         price_last + acre + income_cat
##################################################################################################
##################################################################################################

# 1. grain_yi + prcp_6 + tmean_6
## 1.1 OLS
ols1 <- plm(grain_yi ~ prcp_6 + tmean_6 + price_last + acre + income_cat + time_trend,
           data = y3, model = "pooling", index = c("state_abbr", "year"))
## 1.2 FE
fe1 <- plm(grain_yi ~ prcp_6 + tmean_6 + price_last + acre + income_cat + time_trend,
          data = y3, model = "within", index = c("state_abbr", "year"))
## 1.3 RE 
re1 <- plm(grain_yi ~ prcp_6 + tmean_6 + price_last + acre + income_cat + time_trend,
          data = y3, model = "random", index = c("state_abbr", "year"))
## 1.4 Hausman Test
phtest(fe1, re1) # Hausman test, which support Fixed Effects Model
## 1.5 Regression Results Table
stargazer(ols1,fe1,re1, title = "Treatment 1: Yield and June climate", aligh=T,
          dep.var.labels = c("Yield (Bu/Acre)"),
          #covariate.labels = c("JunePrcp", "JuneMeanTemp", "Income", "PlantArea", 1996:2014),
          no.space = T,
          omit.stat = c("LL","ser","f"),
          column.labels = c("OLS", "FE Model", "RE Model"),
          dep.var.caption = "",
          model.numbers = F, type = "text"
)

PlotFittedYield(fe1, "Yield and Fitted Yield of Model 1")
ggsave("fe1 y and fitted y.jpg")


# 2. grain_yi + prcp_7 + tmean_7
ols2 <- plm(grain_yi ~ prcp_7 + tmean_7 + price_last + acre + income_cat + time_trend,
           data = y3, model = "pooling", index = c("state_abbr", "year"))
fe2 <- plm(grain_yi ~ prcp_7 + tmean_7 + price_last + acre + income_cat + time_trend,
          data = y3, model = "within", index = c("state_abbr", "year"))
re2 <- plm(grain_yi ~ prcp_7 + tmean_7 + price_last + acre + income_cat + time_trend,
          data = y3, model = "random", index = c("state_abbr", "year"))
phtest(fe2, re2) # Hausman test, which support Random Effects Model
stargazer(ols2,fe2,re2, title = "Treatment 2: Yield and July climate", aligh=T,
          dep.var.labels = c("Yield (Bu/Acre)"),
          #covariate.labels = c("JunePrcp", "JuneMeanTemp", "Income", "PlantArea", 1996:2014),
          no.space = T,
          omit.stat = c("LL","ser","f"),
          column.labels = c("OLS", "FE Model", "RE Model"),
          dep.var.caption = "",
          model.numbers = F, type = "text"
)

PlotFittedYield(fe2, "Yield and Fitted Yield of Model 2")
ggsave("fe2 y and fitted y.jpg")

# 3. grain_yi + prcp_6 + tmean_6 + prcp_7 + tmean_7
ols3 <- plm(grain_yi ~ prcp_6 + tmean_6 + prcp_7 + tmean_7 + price_last + acre + income_cat + time_trend,
           data = y3, model = "pooling", index = c("state_abbr", "year"))
fe3 <- plm(grain_yi ~ prcp_6 + tmean_6 + prcp_7 + tmean_7 + price_last + acre + income_cat + time_trend,
          data = y3, model = "within", index = c("state_abbr", "year"))
re3 <- plm(grain_yi ~ prcp_6 + tmean_6 + prcp_7 + tmean_7 + price_last + acre + income_cat + time_trend,
          data = y3, model = "random", index = c("state_abbr", "year"))
phtest(fe3, re3) # Hausman test, which support Random Effects Model
stargazer(ols3,fe3,re3, title = "Treatment 3: Yield and June&July climate", aligh=T,
          dep.var.labels = c("Yield (Bu/Acre)"),
          #covariate.labels = c("JunePrcp", "JuneMeanTemp", "Income", "PlantArea", 1996:2014),
          no.space = T,
          omit.stat = c("LL","ser","f"),
          column.labels = c("OLS", "FE Model", "RE Model"),
          dep.var.caption = "",
          model.numbers = F, type = "text"
)

PlotFittedYield(fe3, "Yield and Fitted Yield of Model 3")
ggsave("fe3 y and fitted y.jpg")

# 4. grain_yi + prcp_5 + tmean_5 + prcp_6 + tmean_6 + prcp_7 + tmean_7 + prcp_8 + tmean_8
ols4 <- plm(grain_yi ~ prcp_5 + tmean_5 + prcp_6 + tmean_6 + prcp_7 + tmean_7 + prcp_8 + tmean_8 + price_last + acre + income_cat + time_trend,
           data = y3, model = "pooling", index = c("state_abbr", "year"))
fe4 <- plm(grain_yi ~ prcp_5 + tmean_5 + prcp_6 + tmean_6 + prcp_7 + tmean_7 + prcp_8 + tmean_8 + price_last + acre + income_cat + time_trend,
          data = y3, model = "within", index = c("state_abbr", "year"))
re4 <- plm(grain_yi ~ prcp_5 + tmean_5 + prcp_6 + tmean_6 + prcp_7 + tmean_7 + prcp_8 + tmean_8 + price_last + acre + income_cat + time_trend,
          data = y3, model = "random", index = c("state_abbr", "year"))
phtest(fe4, re4) # Hausman test, which support Random Effects Model
stargazer(ols4,fe4,re4, title = "Treatment 4: Yield and May to Aug climate", aligh=T,
          dep.var.labels = c("Yield (Bu/Acre)"),
          #covariate.labels = c("JunePrcp", "JuneMeanTemp", "Income", "PlantArea", 1996:2014),
          no.space = T,
          omit.stat = c("LL","ser","f"),
          column.labels = c("OLS", "FE Model", "RE Model"),
          dep.var.caption = "",
          model.numbers = F, type = "text"
)

PlotFittedYield(re4, "Yield and Fitted Yield of Model 4")
ggsave("fe4 y and fitted y actually re4.jpg")


# 5. grain_prod + prcp_6 + tmean_6
ols5 <- plm(grain_prod ~ prcp_6 + tmean_6 + price_last + acre + income_cat + time_trend,
           data = y3, model = "pooling", index = c("state_abbr", "year"))
fe5 <- plm(grain_prod ~ prcp_6 + tmean_6 + price_last + acre + income_cat + time_trend,
          data = y3, model = "within", index = c("state_abbr", "year"))
re5 <- plm(grain_prod ~ prcp_6 + tmean_6 + price_last + acre + income_cat + time_trend,
          data = y3, model = "random", index = c("state_abbr", "year"))
phtest(fe5, re5) # Hausman test, which support Fixed Effects Model
stargazer(ols5,fe5,re5, title = "Treatment 5: Production and June climate", aligh=T,
          dep.var.labels = c("Production (Bu)"),
          #covariate.labels = c("JunePrcp", "JuneMeanTemp", "Income", "PlantArea", 1996:2014),
          no.space = T,
          omit.stat = c("LL","ser","f"),
          column.labels = c("OLS", "FE Model", "RE Model"),
          dep.var.caption = "",
          model.numbers = F, type = "text"
)

PlotFittedProduction(fe5, "Production and Fitted Production of Model 5")
ggsave("fe5 y and fitted y.jpg")

# 6. grain_prod + prcp_7 + tmean_7
ols6 <- plm(grain_prod ~ prcp_7 + tmean_7 + price_last + acre + income_cat + time_trend,
           data = y3, model = "pooling", index = c("state_abbr", "year"))
fe6 <- plm(grain_prod ~ prcp_7 + tmean_7 + price_last + acre + income_cat + time_trend,
          data = y3, model = "within", index = c("state_abbr", "year"))
re6 <- plm(grain_prod ~ prcp_7 + tmean_7 + price_last + acre + income_cat + time_trend,
          data = y3, model = "random", index = c("state_abbr", "year"))
phtest(fe6, re6) # Hausman test, which support Random Effects Model
stargazer(ols6,fe6,re6, title = "Treatment 6: Production and July climate", aligh=T,
          dep.var.labels = c("Production (Bu)"),
          #covariate.labels = c("JunePrcp", "JuneMeanTemp", "Income", "PlantArea", 1996:2014),
          no.space = T,
          omit.stat = c("LL","ser","f"),
          column.labels = c("OLS", "FE Model", "RE Model"),
          dep.var.caption = "",
          model.numbers = F, type = "text"
)

PlotFittedProduction(fe6, "Production and Fitted Production of Model 6")
ggsave("fe6 y and fitted y.jpg")


# 7. grain_prod + prcp_6 + tmean_6 + prcp_7 + tmean_7
ols7 <- plm(grain_prod ~ prcp_6 + tmean_6 + prcp_7 + tmean_7 + price_last + acre + income_cat + time_trend,
           data = y3, model = "pooling", index = c("state_abbr", "year"))
fe7 <- plm(grain_prod ~ prcp_6 + tmean_6 + prcp_7 + tmean_7 + price_last + acre + income_cat + time_trend,
          data = y3, model = "within", index = c("state_abbr", "year"))
re7 <- plm(grain_prod ~ prcp_6 + tmean_6 + prcp_7 + tmean_7 + price_last + acre + income_cat + time_trend,
          data = y3, model = "random", index = c("state_abbr", "year"))
phtest(fe7, re7) # Hausman test, which support Random Effects Model
stargazer(ols7,fe7,re7, title = "Treatment 7: Production and June&July climate", aligh=T,
          dep.var.labels = c("Production (Bu)"),
          #covariate.labels = c("JunePrcp", "JuneMeanTemp", "Income", "PlantArea", 1996:2014),
          no.space = T,
          omit.stat = c("LL","ser","f"),
          column.labels = c("OLS", "FE Model", "RE Model"),
          dep.var.caption = "",
          model.numbers = F, type = "text"
)

PlotFittedProduction(fe7, "Production and Fitted Production of Model 7")
ggsave("fe7 y and fitted y.jpg")

# 8. grain_prod + prcp_5 + tmean_5 + prcp_6 + tmean_6 + prcp_7 + tmean_7 + prcp_8 + tmean_8
ols8 <- plm(grain_prod ~ prcp_5 + tmean_5 + prcp_6 + tmean_6 + prcp_7 + tmean_7 + prcp_8 + tmean_8 + price_last + acre + income_cat + time_trend,
           data = y3, model = "pooling", index = c("state_abbr", "year"))
fe8 <- plm(grain_prod ~ prcp_5 + tmean_5 + prcp_6 + tmean_6 + prcp_7 + tmean_7 + prcp_8 + tmean_8 + price_last + acre + income_cat + time_trend,
          data = y3, model = "within", index = c("state_abbr", "year"))
re8 <- plm(grain_prod ~ prcp_5 + tmean_5 + prcp_6 + tmean_6 + prcp_7 + tmean_7 + prcp_8 + tmean_8 + price_last + acre + income_cat + time_trend,
          data = y3, model = "random", index = c("state_abbr", "year"))
phtest(fe8, re8) # Hausman test, which support Random Effects Model
stargazer(ols8,fe8,re8, title = "Treatment 8: Production and May to Aug climate", aligh=T,
          dep.var.labels = c("Production (Bu)"),
          #covariate.labels = c("JunePrcp", "JuneMeanTemp", "Income", "PlantArea", 1996:2014),
          no.space = T,
          omit.stat = c("LL","ser","f"),
          column.labels = c("OLS", "FE Model", "RE Model"),
          dep.var.caption = "",
          model.numbers = F, type = "text"
)

PlotFittedProduction(fe8, "Production and Fitted Production of Model 8")
ggsave("fe8 y and fitted y.jpg")

# 9. Important Tables
stargazer(fe3,fe7, title = "Yield/Production and June&July climate", aligh=T,
          dep.var.labels = c("Yield (Bu/Acre)", "Production (Bu)"),
          #covariate.labels = c("JunePrcp", "JuneMeanTemp", "Income", "PlantArea", 1996:2014),
          no.space = T,
          omit.stat = c("LL","ser","f"),
          column.labels = c("Yield FE Model", "Production FE Model"),
          dep.var.caption = "",
          model.numbers = F, type = "text"
)

